library(MCMCpack)
library(foreign)
library(car)

setwd("~/Google Drive/Broockman/Completed/Extremity vs Consistency/ReplicationData/MainSurveyApril2013")

save <- TRUE

data <- read.dta("ssi-survey-apr-2013.dta", convert.underscore = TRUE)
names(data)

#ANALYSIS OF 7-PART QUESTIONS
multis<-data[,22:33]

multi.counts <- matrix(nrow = 7, ncol = ncol(multis)) #Rows are answers, columns are questions
for(i in 1:nrow(multi.counts)){#Loop over answers, sum the number of responses for each question that have it
	for(j in 1:ncol(multis)){#for each question...
		multi.counts[i,j] <- sum(data$weight[which(multis[,j]==i)])
	}
}
for(i in 1:ncol(multi.counts)){#Get opinion distribution -- Loop over questions, divide by the total number of responses
	multi.counts[,i] <- multi.counts[,i] / sum(multi.counts[,i])
}

multi.names <- c("Gun Control", "Health Care", "Immigration", "Taxes", "Abortion", "Environment", "Medicare", "Gay Rights", "Affirmative Action", "Unions", "Contraception", "Education")
cbind(names(multis), multi.names) #make sure names are right

average.opinion.across.issues <- rowSums(multi.counts)
	average.opinion.across.issues <- average.opinion.across.issues / sum(average.opinion.across.issues)
issue.specific.averages.across.rs <- rowMeans(multis, na.rm = TRUE)



if(save) pdf("masspub_respondents_then_issues_weighted.pdf", width = 900*2/50, height = 580*2/50, pointsize = 16*2)
opar <- par(lwd=3)
hist(issue.specific.averages.across.rs, weights = data$weights, main = "Distribution of Respondents' Average Responses - Mass Public", ylab = "Frequency", xlab = "", xlim = c(.6,7.4), breaks=30, cex=.3)
par(opar)
if(save) dev.off()


makeplot <- function(seven.point, title, ylimtop = .45, sub=""){
	bp <- barplot(seven.point, main = title, ylim=c(0,ylimtop), names.arg = c(1:7), col = c("darkblue", "blue3", "blue", "Purple", "red", "red3", "darkred"), sub=sub)
	text(bp, seven.point + .01, paste0(round(seven.point*100, 0),"%"),cex=.9,pos=3)
}
if(save) pdf("dist_of_multis_weighted.pdf", width = 1800/50, height = 580*2/50, pointsize = 30)
par(mfrow=c(3,4), oma = c(0,0,3,0), mar = c(3,3,3,3))
for(i in 1:ncol(multi.counts)){
	makeplot(multi.counts[,i],multi.names[i], ylimtop=0.55)
}
par(mfrow=c(1,1), oma = c(0,0,0,0))
title(main="Distribution of Opinion on Twelve Issues - Mass Public")
if(save) dev.off()



if(save) pdf("issues_then_respondents_weighted.pdf", width = 1800/50, height = 1000/50, pointsize = 30)
makeplot(average.opinion.across.issues, "Distribution of Responses on Average Issue - Mass Public", sub = "Aggregation Strategy: Create Issue-Level Summaries Across Respondents, Then Aggregate Across Issues",ylimtop=0.25)
if(save) dev.off()



#BEHAVIOR OF IRT MODELS IN THIS DATA
binary.iqs <- data[,1:21]

#Build ideology IRT model from IQ responses
typical.ideology.scale <- colMeans(MCMCirt1d(binary.iqs))
extremity.typical.scale <- abs(typical.ideology.scale)

#Build issue IRT model from knowledge Qs
political.knowledge <- colMeans(MCMCirt1d(data[,38:45]))
	summary(lm(political.knowledge ~ data$knowledge.controlhouse)) #make sure scale goes the right way

#Number of extreme responses given
liberal.extreme.responses <- rowSums(multis <= 2)/ncol(multis)
conservative.extreme.responses <- rowSums(multis >= 6)/ncol(multis)
extreme.responses.sum <- conservative.extreme.responses + liberal.extreme.responses


#Function to make plots
ssplot <- function(x, xlab, y, ylab, main = "", sub = "", weights, ylim=NA){
	if(is.na(ylim)) plot(x, y, xlab = xlab, ylab = ylab, main = main, sub = sub, lwd=0.7, pch=20)
	if(!is.na(ylim)) plot(x, y, xlab = xlab, ylab = ylab, main = main, sub = sub, lwd=0.7, pch=20, ylim=ylim)
	lines(loess.smooth(x,y), lwd=4, col='red')
}


if(save) pdf("knowledge_and_extremity.pdf", width = 1100/50, height = 600/50, pointsize = 22)
par(mfrow=c(1,2))
ssplot(political.knowledge, "Political Knowledge", extremity.typical.scale, "Extremity on Ideological Scale", main = "(a)")
ssplot(political.knowledge, "Political Knowledge", extreme.responses.sum, "Share of Responses Extreme", main = "(b)")
if(save) dev.off()


#How the measures are built
if(save) pdf("extremity_scale_validation.pdf", width = 1100/50, height = 400/50, pointsize = 22)
par(mfrow=c(1,3))
ssplot(typical.ideology.scale, "Typical Ideological Scale", liberal.extreme.responses, "Share of Responses Extreme Liberal", main = "(a)")
ssplot(typical.ideology.scale, "Typical Ideological Scale", conservative.extreme.responses, "Share of Responses Extreme Conservative", main = "(b)",ylim=c(0,1))
ssplot(typical.ideology.scale, "Typical Ideological Scale", extreme.responses.sum, "Share of Responses Extreme", main  = "(c)")
if(save) dev.off()



#How the measures are built - SELF PLACEMENT
if(save) pdf("extremity_scale_validation_selfplace.pdf", width = 1100/50, height = 400/50, pointsize = 22)
par(mfrow=c(1,3))
ssplot(data$personal.ideology + runif(length(data$personal.ideology), min=-.2,max=.2), "Ideological Self-Placement", liberal.extreme.responses + runif(length(data$personal.ideology), min=-.02,max=.02), "Share of Responses Extreme Liberal", main = "(a)")
ssplot(data$personal.ideology + runif(length(data$personal.ideology), min=-.2,max=.2), "Ideological Self-Placement", conservative.extreme.responses + runif(length(data$personal.ideology), min=-.02,max=.02), "Share of Responses Extreme Conservative", main = "(b)",ylim=c(0,1))
ssplot(data$personal.ideology + runif(length(data$personal.ideology), min=-.2,max=.2), "Ideological Self-Placement", extreme.responses.sum + runif(length(data$personal.ideology), min=-.02,max=.02), "Share of Responses Extreme", main  = "(c)")
if(save) dev.off()



#Regressions
#IRT vs. extremism on issues
summary(lm(extreme.responses.sum ~ extremity.typical.scale, weights = data$weight)) ###THE KEY RESULT
	sd(extremity.typical.scale) #to get a sense of magnitude
	
#Self-placement vs. extremism on issues
summary(lm(extreme.responses.sum ~ (abs(4-data$personal.ideology)), weights = data$weight))

#Knowledge and extremism on issues vs. IRT
summary(lm(extremity.typical.scale ~ political.knowledge, weights = data$weight))
summary(lm(extreme.responses.sum ~ political.knowledge, weights = data$weight))

#Extremism and primary votes
summary(lm(extreme.responses.sum ~ data$vote.turnout.primary.2012, weights = data$weight, na.rm=TRUE))
summary(lm(extremity.typical.scale ~ data$vote.turnout.primary.2012, weights = data$weight))